// do /Users/shoude/Dropbox/eegap/EEgap_data_code_heter_SM/Code/sumstats_ee_2012_2022.do

/*
Check what there is in CCMS public that LBNL shared with us
*/

/*
Variable definition:
AV: adjusted volume, which is used in the definition of the MEPS 
kwh_y: kwh/year
meps_2014: MEPS set in September 2014
meps_2001: MEPS set in January 2001
pd_class_pre_2014: product class used to define the 2001 MEPS
pd_class_post_2014: product class used to define the 2014 MEPS
datecertified: Date of certification
date_first_collection: Date a model was first scraped: our proxy for model entry 
date_last_collection: Date a model was last scraped: our proxy for model exit  
duration_scrap = date_last_collection-date_first_collection: Number of days on the market
*/

pause on

*global  pathdata_1 = "/Users/shoude/Dropbox/eegap/EEgap_data/Prospector_LBNL"
*global  pathresults = "/Users/shoude/Dropbox/eegap/EEgap_data_code_heter_SM/Results_estimation"

global  pathdata_1 = "C:\Users\erica.myers\Dropbox\Appliance_EnergyPrice\EEgap_data\Prospector_LBNL"
global  pathresults = "C:\Users\erica.myers\Dropbox\Appliance_EnergyPrice\EEgap_data_code_heter_SM\Results_estimation"
global overleafresults = "C:\Users\erica.myers\Dropbox\Apps\Overleaf\Heterogeneous (Mis-) Perceptions of Energy Costs\Tables"


use $pathdata_1/Prospector_productData_w_attributes, clear

*drop duplicate observations
duplicates drop

sum kwh_y if year_first_collection==2012
sum kwh_y if year_first_collection==2013
sum kwh_y if year_first_collection==2014
sum kwh_y if year_first_collection==2015
sum kwh_y if year_first_collection==2016

gen kwh_y1 = kwh_y if year_first_collection==2013
gen kwh_y2 = kwh_y if year_first_collection==2015
sdtest kwh_y1==kwh_y2


sum kwh_y if year_first_collection==2012&meps_2001~=.
sum kwh_y if year_first_collection==2013&meps_2001~=.
sum kwh_y if year_first_collection==2014&meps_2014~=.
sum kwh_y if year_first_collection==2015&meps_2014~=.
sum kwh_y if year_first_collection==2016&meps_2014~=.

sum kwh_y if year_first_collection < 2014
sum kwh_y if year_first_collection > 2014

sum kwh_y if meps_2001 ~=.
sum kwh_y if meps_2014 ~=.


sum AV if year_first_collection==2012
sum AV if year_first_collection==2013
sum AV if year_first_collection==2014
sum AV if year_first_collection==2015
sum AV if year_first_collection==2016

gen meps_rel_2001 = -100*(kwh_y - meps_2001)/meps_2001
gen meps_rel_2014 = -100*(kwh_y - meps_2014)/meps_2014

sum meps_rel_2001 if year_first_collection==2012
sum meps_rel_2001 if year_first_collection==2013
sum meps_rel_2014 if year_first_collection==2014
sum meps_rel_2014 if year_first_collection==2015
sum meps_rel_2014 if year_first_collection==2016

gen kwh_per_AV = kwh_y/AV

*Let's call the model in the data set if it hasn't exited yet--try to create an idea of a panel

*For models in the data set in 2012
sum kwh_y if year_first_collection<=2012&year_last_collection>2012
sum AV if year_first_collection<=2012&year_last_collection>2012
sum kwh_per_AV if year_first_collection<=2012&year_last_collection>2012
*sum meps_rel_2001 if year_first_collection<=2012&year_last_collection>2012

*For models in the data set in 2013
sum kwh_y if year_first_collection<=2013&year_last_collection>2013
sum AV if year_first_collection<=2013&year_last_collection>2013
sum kwh_per_AV if year_first_collection<=2013&year_last_collection>2013

*sum meps_rel_2001 if year_first_collection<=2013&year_last_collection>2013

*For models in the data set in 2014
sum kwh_y if year_first_collection<=2014&year_last_collection>2014
sum AV if year_first_collection<=2014&year_last_collection>2014
sum kwh_per_AV if year_first_collection<=2014&year_last_collection>2014

*For models in the data set in 2015
sum kwh_y if year_first_collection<=2015&year_last_collection>2015
sum AV if year_first_collection<=2015&year_last_collection>2015
sum kwh_per_AV if year_first_collection<=2015&year_last_collection>2015

*For models in the data set in 2016
sum kwh_y if year_first_collection<=2016&year_last_collection>2016
sum AV if year_first_collection<=2016&year_last_collection>2016
sum kwh_per_AV if year_first_collection<=2016&year_last_collection>2016

count if year_first_collection<=2015&year_last_collection>=2015
count if year_first_collection<=2013&year_last_collection>=2013

count if year_first_collection<=2015&year_last_collection>=2015&kwh_y~=.
count if year_first_collection<=2013&year_last_collection>=2013&kwh_y~=.

gen kwh_per_AV2 = kwh_per_AV if year_first_collection<=2015&year_last_collection>=2015
gen kwh_per_AV1 = kwh_per_AV if year_first_collection<=2013&year_last_collection>=2013

gen kwh_2015 = kwh_y if year_first_collection<=2015&year_last_collection>=2015
gen kwh_2013 = kwh_y if year_first_collection<=2013&year_last_collection>=2013

sdtest kwh_per_AV1==kwh_per_AV2
sdtest kwh_2015==kwh_2013

**********************************************************************************************************************************************************
*TABLE AX: SUMMARY STATS BEFORE VS. AFTER 2014 STANDARD
**********************************************************************************************************************************************************
eststo clear

estpost tabstat kwh_2013 kwh_2015 kwh_per_AV1 kwh_per_AV2, c(stat) stat(mean sd min max n)
#delimit ;
esttab using "$overleafresults\LBNL_sumstat_check.tex",
replace /// 
cells("mean(fmt(2)) sd(fmt(2)) min(fmt(0)) max(fmt(0)) count(fmt(0))") ///
 nonumber ///
 nomtitles ///
 nonotes ///
 booktabs ///
 noobs ///
 title("Summary Statistics by Year Available\label{t:LBNLsumstat}")
 collabels("Mean" "SD" "Min" "Max" "Count") ///
 addnote("Source: LBNL Refrigerator Model and Attribute Data") ///
 coeflabels(kwh_2013 "2013 kWh/year" kwh_2015 "2015 kWh/year" kwh_per_AV1 "2013 kWh/year/ft$^3$" kwh_per_AV2 "2015 kWh/year/ft$^3$")
 ;
#delimit cr 


sum kwh_2013 if type=="Compact Fridge"
sum kwh_2013 if type=="Top Freezer"
sum kwh_2013 if type=="Side Freezer"
sum kwh_2013 if type=="Bottom Freezer"

sum kwh_2015
sum kwh_2015 if type=="Compact Fridge"
sum kwh_2015 if type=="Top Freezer"
sum kwh_2015 if type=="Side Freezer"
sum kwh_2015 if type=="Bottom Freezer"

**********************************************************************************************************************************************************
*TABLE AX: COMPARISON OF VARIANCE IN BEFORE VS. AFTER 2014 STANDARD
**********************************************************************************************************************************************************


est clear

sdtest kwh_2013==kwh_2015

scalar pval1 = round(r(p_u),0.0001)
local pval1: di %5.4f pval1

scalar sd_control1 = round(r(sd_1),.01)
local sd_control1: di %4.3f sd_control1

scalar sd_treat1 = round(r(sd_2),.01)
local sd_treat1: di %4.3f sd_treat1

scalar ftest1 = round(r(F),.0001)
local ftest1: di %4.3f ftest1

scalar obs1 = round(r(N),.01)
local obs1: di %4.3f obs1

sdtest kwh_per_AV1==kwh_per_AV2

scalar Pval = round(r(p_u),0.0001)
local pval2: di %5.4f Pval

scalar sd_control2 = round(r(sd_1),.01)
local sd_control2: di %4.3f sd_control2

scalar sd_treat2 = round(r(sd_2),.01)
local sd_treat2: di %4.3f sd_treat2



scalar ftest2 = round(r(F),.0001)
local ftest2: di %4.3f ftest2

scalar obs2 = round(r(N),.01)
local obs2: di %4.3f obs2

matrix input sdtest = (`sd_control1',`sd_treat1', `ftest1', `pval1', `obs1' \ `sd_control2',`sd_treat2', `ftest2', `pval2', `obs2')
matrix colnames sdtest = "SD 2013" "SD 2015" "F-Statistic" "P-value" "Observations"
matrix rownames sdtest = "kWh per year" "kWh per year/adjusted volume"
esttab matrix(sdtest) using  "$overleafresults\table_sdtest_check.tex", replace ///
   compress nonote noobs gap label booktabs f   ///
   nomtitles


/*

*We have some more work to do here:
*But it would be interesting to compute the MEPS_2001 for models
*that entered the market after 2014, and vice-versa
*i.e., compute the MEPS_2014 for models that entered before 2014
*This would tell us that if all models were to just meet the 
*two MEPS, how variable would be the choice set.

*These sum stats have been computed as follows: meps_2001 is for models certified before 2014.
*Note that the fact that year_first_collection==2016 means that the scrapping algo of LBNL just
*found them in 2016.
*meps_2014 is for models certified after 2014.
*Note that the fact that year_first_collection==2016 means that the scrapping algo of LBNL
*found them before 2014. 
*I assigned the meps_2001 and meps_2014 using the certified date


sum meps_2001 if year_first_collection==2012
sum meps_2001 if year_first_collection==2013
sum meps_2001 if year_first_collection==2014
sum meps_2001 if year_first_collection==2015
sum meps_2001 if year_first_collection==2016

sum meps_2014 if year_first_collection==2012
sum meps_2014 if year_first_collection==2013
sum meps_2014 if year_first_collection==2014
sum meps_2014 if year_first_collection==2015
sum meps_2014 if year_first_collection==2016

*Same as above but using year_certified
*Note that the LBNL data is problematic
*they don't have models certified in the 
*years between 2016-2019.
sum meps_2001 if year_certified==2012
sum meps_2001 if year_certified==2013
sum meps_2001 if year_certified==2014
sum meps_2001 if year_certified==2015
sum meps_2001 if year_certified==2016

sum meps_2014 if year_certified==2012
sum meps_2014 if year_certified==2013
sum meps_2014 if year_certified==2014
sum meps_2014 if year_certified==2015
sum meps_2014 if year_certified==2016

